import os
import csv
from io import BytesIO
from io import TextIOWrapper
from zipfile import ZipFile
from urllib.request import urlopen
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import folium
import numpy as np
import seaborn as sns
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
pd.options.display.max_columns = None
def get_current_working_directory():
"""
:return:
"""
current_path = os.getcwd()
print("\n" + "Current Working Directory Set As: " + str(current_path) + "\n")
return current_path
def change_current_working_directory(directory):
"""
:param directory:
:return:
"""
if os.path.exists(directory):
os.chdir(directory)
print("\n" + "Working Directory Changed To: " + str(directory) + "\n")
else:
print("\n" + "Directory Does Not Exists. Working Directory Have Not Been Changed." + "\n")
return str(os.getcwd())
def create_new_working_directory(directory):
"""
:param directory:
:return:
"""
if os.path.exists(directory):
print("\n" + "Directory: " + str(directory) + " Already Exists." + "\n")
else:
os.mkdir(directory)
print("\n" + "New Directory Created: " + str(directory) + "\n")
def get_in_memory_data(url,file_name):
zip_files = []
resp = urlopen(url)
with ZipFile(BytesIO(resp.read())) as zf:
with zf.open(file_name, 'r') as infile:
df = pd.read_csv(zf.open(file_name), delimiter=";")
return df
def get_in_memory_data(url):
zip_files = []
resp = urlopen(url)
with ZipFile(BytesIO(resp.read())) as zf:
text_files = zf.infolist()
for text_file in text_files:
with zf.open(text_file, 'r') as infile:
df = pd.read_csv(zf.open(text_file.filename), delimiter=";")
return df
def get_list_of_files_from_directory(directory):
"""
:param directory:
:return:
"""
list_of_files = []
for item in os.listdir(directory):
list_of_files.append(item)
print("\n" + "List Of Files: " + str(list_of_files) + "\n" + "From Directory: " + str(directory) + "\n")
return list_of_files
def df_to_csv(object_name, directory, csv_file_name):
"""
:param object_name:
:param directory:
:param csv_file_name:
:return:
"""
#object_name = object_name.reset_index(inplace=True)
object_name.to_csv(directory + csv_file_name + ".csv", header=True, index=False)
print("\n" + str(csv_file_name) + ".csv Written Under Directory: " + str(directory) + "\n")
change_current_working_directory("C:\\Users\\KonuTech\\Downloads")
create_new_working_directory(directory=get_current_working_directory() + "\\election\\")
change_current_working_directory(get_current_working_directory() + "\\election\\")
get_current_working_directory()
candidates_of_first_round = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/1/csv/kandydaci_csv.zip")
candidates_of_first_round = candidates_of_first_round.set_index('Nazwisko')
candidates_of_first_round.head(3)
candidates_of_first_round['Procent wszystkich głosów'] = candidates_of_first_round['Procent wszystkich głosów'].apply(lambda x: x.replace(',','.'))
candidates_of_first_round['Procent wszystkich głosów'] = pd.to_numeric(candidates_of_first_round['Procent wszystkich głosów'],errors='coerce')
candidates_of_first_round.sort_values('Procent wszystkich głosów',inplace=True)
sns.set(font_scale=1.2)
candidates_of_first_round["Procent wszystkich głosów"].plot(kind='barh', stacked=True, colormap='jet', figsize=(12,8)).legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.title("Results of FIRST presidential election round in percentages")
plt.xlabel("Candidates")
plt.show()
voting_wards_after_the_first_round_of_elections = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/1/csv/obwody_glosowania_csv.zip")
voting_wards_after_the_first_round_of_elections.head(3)
composition_of_precinct_election_commissions_in_the_first_round_of_elections = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/1/csv/sklad_komisji_obwodowych_csv.zip")
composition_of_precinct_election_commissions_in_the_first_round_of_elections.head(3)
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_by_precincts = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/1/csv/wyniki_gl_na_kand_po_obwodach_csv.zip")
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_by_precincts.head(3)
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_precincts = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/1/csv/wyniki_gl_na_kand_po_obwodach_proc_csv.zip")
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_precincts.head(3)
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_by_communes = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/1/csv/wyniki_gl_na_kand_po_gminach_csv.zip")
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_by_communes.head(3)
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_communes = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/1/csv/wyniki_gl_na_kand_po_gminach_proc_csv.zip")
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_communes.head(3)
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_by_poviats = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/1/csv/wyniki_gl_na_kand_po_powiatach_csv.zip")
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_by_poviats.head(3)
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_poviats = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/1/csv/wyniki_gl_na_kand_po_powiatach_proc_csv.zip")
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_poviats.head(3)
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_by_voivodship = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/1/csv/wyniki_gl_na_kand_po_wojewodztwach_csv.zip")
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_by_voivodship.head(3)
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/1/csv/wyniki_gl_na_kand_po_wojewodztwach_proc_csv.zip")
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship.head(3)
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship['Procent wszystkich głosów'] = results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship['Robert BIEDROŃ'].apply(lambda x: x.replace(',','.'))
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship['Procent wszystkich głosów'] = pd.to_numeric(results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship['Robert BIEDROŃ'],errors='coerce')
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted = results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship.melt(id_vars=["Kod TERYT", "Województwo", "Frekwencja", "% głosów nieważnych", "W tym z powodu postawienia znaku „X” obok nazwiska dwóch lub większej liczby kandydatów", "W tym z powodu niepostawienia znaku „X” obok nazwiska żadnego kandydata", "W tym z powodu postawienia znaku „X” wyłącznie obok skreślonego nazwiska kandydata", "% głosów ważnych", "Liczba obwodów"],
var_name="Candidate",
value_name="Percentage of votes")
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted.info()
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted.dropna(inplace=True)
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted['Percentage of votes'] = (results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted['Percentage of votes'].replace('\.','', regex=True)
.replace(',','.', regex=True)
.astype(float))
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted.info()
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted_subset = results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted[["Województwo", "Candidate", "Percentage of votes"]]
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted_subset
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_unstacked = results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_melted_subset.set_index(["Województwo", "Candidate"])
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_unstacked
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_unstacked = results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_unstacked.unstack(level=-1)
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_unstacked
results_of_voting_for_candidates_in_the_FIRST_round_of_elections_percentage_by_voivodship_unstacked.plot(kind='bar', stacked=True, colormap='jet', figsize=(12,8)).legend(loc='center left', bbox_to_anchor=(1, 0.5))
#plt.title("Frequency of genres by year 2000 - 2013. So far for data after 2013 is undersampled")
#plt.show()
candidates_of_second_round = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/2/csv/kandydaci_csv.zip")
candidates_of_second_round.head(3)
candidates_of_second_round['Procent wszystkich głosów'] = candidates_of_second_round['Procent wszystkich głosów'].apply(lambda x: x.replace(',','.'))
candidates_of_second_round['Procent wszystkich głosów'] = pd.to_numeric(candidates_of_second_round['Procent wszystkich głosów'],errors='coerce')
labels = candidates_of_second_round["Nazwisko"]
sizes = candidates_of_second_round["Procent wszystkich głosów"]
fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=labels, autopct='%1.f%%', shadow=True, startangle=90, labeldistance=0.8)
plt.show()
voting_wards_after_the_second_round_of_elections = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/2/csv/obwody_glosowania_csv.zip")
voting_wards_after_the_second_round_of_elections.head(3)
composition_of_precinct_election_commissions_in_the_first_round_of_elections = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/2/csv/sklad_komisji_obwodowych_csv.zip")
composition_of_precinct_election_commissions_in_the_first_round_of_elections.head(3)
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_by_precincts = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/2/csv/wyniki_gl_na_kand_po_obwodach_csv.zip")
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_by_precincts.head(3)
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_precincts = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/2/csv/wyniki_gl_na_kand_po_obwodach_proc_csv.zip")
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_precincts.head(3)
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_by_communes = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/2/csv/wyniki_gl_na_kand_po_gminach_csv.zip")
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_by_communes.head(3)
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/2/csv/wyniki_gl_na_kand_po_gminach_proc_csv.zip")
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes.head(3)
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_by_poviats = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/2/csv/wyniki_gl_na_kand_po_powiatach_csv.zip")
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_by_poviats.head(3)
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_poviats = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/2/csv/wyniki_gl_na_kand_po_powiatach_proc_csv.zip")
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_poviats.head(3)
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_by_voivoidship = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/2/csv/wyniki_gl_na_kand_po_wojewodztwach_csv.zip")
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_by_voivoidship.head(3)
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_voivoidship = get_in_memory_data(url="https://wybory.gov.pl/prezydent20200628/data/2/csv/wyniki_gl_na_kand_po_wojewodztwach_proc_csv.zip")
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_voivoidship.head(3)
get_current_working_directory()
get_list_of_files_from_directory(get_current_working_directory())
gus_population = 'LUDN_2137_XTAB_20200916001852.xlsx'
gus_unemployment = 'RYNE_1944_XTAB_20200916002137.xlsx'
population = pd.read_excel(gus_population, sheet_name="TABLICA")
population.dropna(inplace=True)
population["Kod"]=population["Kod"].astype(int)
population["population"]=population["ogółem"].astype(int)
population = population[['Kod','population']]
population.info()
unemployment = pd.read_excel(gus_unemployment, sheet_name="TABLICA")
unemployment.dropna(inplace=True)
unemployment["Kod"]=unemployment["Kod"].astype(int)
unemployment["unemployment"]=unemployment["ogółem"].astype(int)
unemployment = unemployment[['Kod','unemployment']]
unemployment.info()
gus_data = pd.merge(population, unemployment, how='inner', on='Kod')
gus_data
gus_data['unemployment_rate'] = 100 * gus_data['unemployment'] / gus_data['population']
gus_data
get_current_working_directory()
gus_data["Kod_commune"] = gus_data["Kod"].apply(lambda x: '0'+str(x) if len(str(x)) < 7 else str(x))
gus_data.head()
gus_data['Kod_voivoidship'] = gus_data["Kod_commune"].apply(lambda s: s[:2])
gus_data.head()
gus_data = gus_data[gus_data['Kod'] != '0']
# Voivoideships
gus_data_voivoidships = gus_data[gus_data["Kod_commune"].str[2:7] == '00000']
gus_data_voivoidships
# Communes
gus_data_communes = gus_data[gus_data["Kod_commune"].str[4:7] != '000']
gus_data_communes
voivoidships_map = gpd.read_file('C:\\Users\\KonuTech\\Downloads\\election\\jednostki_administracyjne\\Województwa.shp')
#voivoidships_map.dtypes
communes_map = gpd.read_file('C:\\Users\\KonuTech\\Downloads\\election\\jednostki_administracyjne\\gminy.shp')
#communes_map.dtypes
voivoidships_map = voivoidships_map[['JPT_KOD_JE', "geometry"]]
voivoidships_map.head()
communes_map = communes_map[['JPT_KOD_JE', "geometry"]]
communes_map.head()
voivoidships_map_data = pd.merge(voivoidships_map, gus_data_voivoidships, how='left', left_on='JPT_KOD_JE', right_on='Kod_voivoidship')
voivoidships_map_data
communes_map_data = pd.merge(communes_map, gus_data_communes, how='left', left_on='JPT_KOD_JE', right_on='Kod_commune')
communes_map_data
communes_map_data.info()
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes.head(3)
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes["Kod TERYT"]=results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes["Kod TERYT"].astype(str)
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes['Andrzej Sebastian DUDA'] = results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes['Andrzej Sebastian DUDA'].apply(lambda x: x.replace(',','.'))
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes['Andrzej Sebastian DUDA'] = pd.to_numeric(results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes['Andrzej Sebastian DUDA'],errors='coerce')
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes['Rafał Kazimierz TRZASKOWSKI'] = results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes['Rafał Kazimierz TRZASKOWSKI'].apply(lambda x: x.replace(',','.'))
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes['Rafał Kazimierz TRZASKOWSKI'] = pd.to_numeric(results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes['Rafał Kazimierz TRZASKOWSKI'],errors='coerce')
results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes.info()
pkw_data = results_of_voting_for_candidates_in_the_SECOND_round_of_elections_percentage_by_communes[["Kod TERYT", "Rodzaj jednostki", "Andrzej Sebastian DUDA", "Rafał Kazimierz TRZASKOWSKI"]]
pkw_data
pkw_data["Rodzaj jednostki"].unique()
pkw_data.loc[pkw_data['Rodzaj jednostki'] == "gmina miejska", 'Rodzaj jednostki number'] = '1'
pkw_data.loc[pkw_data['Rodzaj jednostki'] == "gmina wiejska", 'Rodzaj jednostki number'] = '2'
pkw_data.loc[pkw_data['Rodzaj jednostki'] == "gmina miejsko-wiejska", 'Rodzaj jednostki number'] = '3'
pkw_data.loc[pkw_data['Rodzaj jednostki'] == "dzielnica w m.st. Warszawa", 'Rodzaj jednostki number'] = '8'
pkw_data.loc[pkw_data['Rodzaj jednostki'] == "statki", 'Rodzaj jednostki number'] = '9'
pkw_data.loc[pkw_data['Rodzaj jednostki'] == "zagranica", 'Rodzaj jednostki number'] = '9'
pkw_data['Kod TERYT'] = pkw_data['Kod TERYT'].str.cat(pkw_data['Rodzaj jednostki number'],sep="")
pkw_data
# Gmina Bolesławiec, Dolnośląskie, Gmina miejska
pkw_data[pkw_data["Kod TERYT"] == "201011"]
pkw_data["Kod_commune"] = pkw_data["Kod TERYT"].apply(lambda x: '0'+str(x) if len(str(x)) < 7 else str(x))
pkw_data.head()
# Communes
pkw_data_communes = pkw_data[pkw_data["Kod_commune"].str[4:7] != '000']
pkw_data_communes
# Gmina Bolesławiec, Dolnośląskie, Gmina miejska
# https://romek.info/ut/dolnoslaskie.html
communes_map[communes_map["JPT_KOD_JE"] == "0201011"]
communes_map_data = pd.merge(communes_map, pkw_data_communes, how='left', left_on='JPT_KOD_JE', right_on='Kod_commune')
communes_map_data
communes_map_data.info()
communes_map_data["Andrzej Sebastian DUDA"].unique()
communes_map_data['Dudas percentage of votes'] = communes_map_data['Andrzej Sebastian DUDA'] / 100
communes_map_data['Trzaskowskis percentage of votes'] = communes_map_data['Rafał Kazimierz TRZASKOWSKI'] / 100
communes_map_data['Difference in percentage points'] = communes_map_data['Trzaskowskis percentage of votes'] - communes_map_data['Dudas percentage of votes']
communes_map_data['Who won'] = communes_map_data['Dudas percentage of votes'].apply(lambda x: 1 if x > 0.5 else 0)
communes_map_data
communes_map_data.geometry = communes_map_data.geometry.simplify(0.005) # mniejsza wartosc = bardziej dokładnie
communes_map_data_json = communes_map_data.to_json()
mapa = folium.Map([52, 19], zoom_start=7)
folium.Choropleth(geo_data=communes_map_data_json,
data=communes_map_data,
columns=['Kod_commune', 'Dudas percentage of votes'],
key_on='feature.properties.JPT_KOD_JE',
fill_color='YlOrRd',
fill_opacity=1,
line_opacity=0.2,
legend_name="Dudas support by communes").add_to(mapa)
mapa.save(outfile = 'dudas_support_by_communes.html')
mapa
communes_map_data.geometry = communes_map_data.geometry.simplify(0.005) # mniejsza wartosc = bardziej dokładnie
communes_map_data_json = communes_map_data.to_json()
mapa = folium.Map([52, 19], zoom_start=7)
folium.Choropleth(geo_data=communes_map_data_json,
data=communes_map_data,
columns=['Kod_commune', 'Trzaskowskis percentage of votes'],
key_on='feature.properties.JPT_KOD_JE',
fill_color='Blues',
fill_opacity=1,
line_opacity=0.2,
legend_name="Trzaskowskis support by communes").add_to(mapa)
mapa.save(outfile = 'trzaskowskis_support_by_communes.html')
mapa
communes_map_data.geometry = communes_map_data.geometry.simplify(0.005) # mniejsza wartosc = bardziej dokładnie
communes_map_data_json = communes_map_data.to_json()
mapa = folium.Map([52, 19], zoom_start=7)
folium.Choropleth(geo_data=communes_map_data_json,
data=communes_map_data,
columns=['Kod_commune', 'Difference in percentage points'],
key_on='feature.properties.JPT_KOD_JE',
fill_color='RdBu',
fill_opacity=1,
line_opacity=0.2,
legend_name="Dudas advantege over Trzaskowski").add_to(mapa)
mapa.save(outfile = 'dudas_advantege_over_trzaskowski.html')
mapa
communes_map_data.geometry = communes_map_data.geometry.simplify(0.005) # mniejsza wartosc = bardziej dokładnie
communes_map_data_json = communes_map_data.to_json()
mapa = folium.Map([52, 19], zoom_start=7)
folium.Choropleth(geo_data=communes_map_data_json,
data=communes_map_data,
columns=['Kod_commune', 'Who won'],
key_on='feature.properties.JPT_KOD_JE',
fill_color='YlOrRd',
fill_opacity=1,
line_opacity=0.2,
legend_name="Who won by communes - red: Duda").add_to(mapa)
mapa.save(outfile = 'dudas_advantege_over_trzaskowski.html')
mapa
# https://stat.gov.pl/statystyki-eksperymentalne/obszary-funkcjonalne-oraz-dostepnosc-terytorialna/wskazniki-dostepnosci-terytorialnej-mieszkancow-polski-do-wybranych-obiektow-uzytecznosci-publicznej,2,1.html
get_current_working_directory()
get_list_of_files_from_directory(get_current_working_directory())
indicators_of_territorial_accessibility = 'Wskazniki_dostepnosci_terytorialnej.xlsx'
territorial_accessibility_metadata = pd.read_excel(indicators_of_territorial_accessibility, sheet_name="METADANE")
territorial_accessibility_metadata
territorial_accessibility = pd.read_excel(indicators_of_territorial_accessibility, sheet_name="ODLEGLOSC")
territorial_accessibility["TERYT"] = territorial_accessibility.astype(str)
territorial_accessibility["Kod_commune"] = territorial_accessibility["TERYT"].apply(lambda x: '0' + str(x) if len(str(x)) < 7 else str(x))
territorial_accessibility.head(3)
territorial_accessibility.info()
communes_map_data.head(3)
communes_territorial_accessibility = pd.merge(communes_map_data, territorial_accessibility, how='left', left_on='JPT_KOD_JE', right_on='Kod_commune')
communes_territorial_accessibility
communes_territorial_accessibility["W1_percent"] = communes_territorial_accessibility["W1"] / communes_territorial_accessibility["W1"].max()
communes_territorial_accessibility["W2_percent"] = communes_territorial_accessibility["W2"] / communes_territorial_accessibility["W2"].max()
communes_territorial_accessibility["W3_percent"] = communes_territorial_accessibility["W3"] / communes_territorial_accessibility["W3"].max()
communes_territorial_accessibility["W4_percent"] = communes_territorial_accessibility["W4"] / communes_territorial_accessibility["W4"].max()
communes_territorial_accessibility["W5_percent"] = communes_territorial_accessibility["W5"] / communes_territorial_accessibility["W5"].max()
communes_territorial_accessibility["W6_percent"] = communes_territorial_accessibility["W6"] / communes_territorial_accessibility["W6"].max()
communes_territorial_accessibility["W7_percent"] = communes_territorial_accessibility["W7"] / communes_territorial_accessibility["W7"].max()
communes_territorial_accessibility["W8_percent"] = communes_territorial_accessibility["W8"] / communes_territorial_accessibility["W8"].max()
communes_territorial_accessibility["W9_percent"] = communes_territorial_accessibility["W9"] / communes_territorial_accessibility["W9"].max()
communes_territorial_accessibility["W10_percent"] = communes_territorial_accessibility["W10"] / communes_territorial_accessibility["W10"].max()
communes_territorial_accessibility["W11_percent"] = communes_territorial_accessibility["W11"] / communes_territorial_accessibility["W11"].max()
communes_territorial_accessibility["W12_percent"] = communes_territorial_accessibility["W12"] / communes_territorial_accessibility["W12"].max()
communes_territorial_accessibility["W13_percent"] = communes_territorial_accessibility["W13"] / communes_territorial_accessibility["W13"].max()
communes_territorial_accessibility["W14_percent"] = communes_territorial_accessibility["W14"] / communes_territorial_accessibility["W14"].max()
communes_territorial_accessibility["W15_percent"] = communes_territorial_accessibility["W15"] / communes_territorial_accessibility["W15"].max()
communes_territorial_accessibility["W16_percent"] = communes_territorial_accessibility["W16"] / communes_territorial_accessibility["W16"].max()
communes_territorial_accessibility["W17_percent"] = communes_territorial_accessibility["W17"] / communes_territorial_accessibility["W17"].max()
communes_territorial_accessibility["W18_percent"] = communes_territorial_accessibility["W18"] / communes_territorial_accessibility["W18"].max()
communes_territorial_accessibility["W19_percent"] = communes_territorial_accessibility["W19"] / communes_territorial_accessibility["W19"].max()
communes_territorial_accessibility["W20_percent"] = communes_territorial_accessibility["W20"] / communes_territorial_accessibility["W20"].max()
communes_territorial_accessibility.head(3)
communes_territorial_accessibility["Placówki wychowania przedszkolnego (bez oddzia..._percent of max"] = communes_territorial_accessibility["W1"] / communes_territorial_accessibility["W1"].max()
communes_territorial_accessibility["Placówki wychowania przedszkolnego_percent of max"] = communes_territorial_accessibility["W2"] / communes_territorial_accessibility["W2"].max()
communes_territorial_accessibility["Szkoły podstawowe dla dzieci_percent of max"] = communes_territorial_accessibility["W3"] / communes_territorial_accessibility["W3"].max()
communes_territorial_accessibility["Szkoły ponadgimnazjalne dla młodzieży (zasadni..._percent of max"] = communes_territorial_accessibility["W4"] / communes_territorial_accessibility["W4"].max()
communes_territorial_accessibility["Komendy/komisariaty/posterunki policji_percent of max"] = communes_territorial_accessibility["W5"] / communes_territorial_accessibility["W5"].max()
communes_territorial_accessibility["Straż pożarna_percent of max"] = communes_territorial_accessibility["W6"] / communes_territorial_accessibility["W6"].max()
communes_territorial_accessibility["Biblioteki publiczne (łącznie z filiami dla dz..._percent of max"] = communes_territorial_accessibility["W7"] / communes_territorial_accessibility["W7"].max()
communes_territorial_accessibility["Biblioteki publiczne (bez filii dla dzieci)_percent of max"] = communes_territorial_accessibility["W8"] / communes_territorial_accessibility["W8"].max()
communes_territorial_accessibility["Centra kultury, domy kultury, ośrodki kultury,..._percent of max"] = communes_territorial_accessibility["W9"] / communes_territorial_accessibility["W9"].max()
communes_territorial_accessibility["Przychodnie zdrowia świadczące usługi poz (łąc..._percent of max"] = communes_territorial_accessibility["W10"] / communes_territorial_accessibility["W10"].max()
communes_territorial_accessibility["Przychodnie zdrowia świadczące usługi poz (bez..._percent of max"] = communes_territorial_accessibility["W11"] / communes_territorial_accessibility["W11"].max()
communes_territorial_accessibility["Szpitale kliniczne (łącznie z dziecięcymi)_percent of max"] = communes_territorial_accessibility["W12"] / communes_territorial_accessibility["W12"].max()
communes_territorial_accessibility["Szpitale kliniczne (bez dziecięcych)_percent of max"] = communes_territorial_accessibility["W13"] / communes_territorial_accessibility["W13"].max()
communes_territorial_accessibility["Stacje pogotowia ratunkowego_percent of max"] = communes_territorial_accessibility["W14"] / communes_territorial_accessibility["W14"].max()
communes_territorial_accessibility["Szpitalne oddziały ratunkowe (łącznie z dzieci..._percent of max"] = communes_territorial_accessibility["W15"] / communes_territorial_accessibility["W15"].max()
communes_territorial_accessibility["Szpitalne oddziały ratunkowe (bez dziecięcych)_percent of max"] = communes_territorial_accessibility["W16"] / communes_territorial_accessibility["W16"].max()
communes_territorial_accessibility["Centra urazowe_percent of max"] = communes_territorial_accessibility["W17"] / communes_territorial_accessibility["W17"].max()
communes_territorial_accessibility["Urzędy gminne/miejskie/dzielnicowe (w ramach j..._percent of max"] = communes_territorial_accessibility["W18"] / communes_territorial_accessibility["W18"].max()
communes_territorial_accessibility["Starostwa powiatowe + urzędy miejskie w miasta..._percent of max"] = communes_territorial_accessibility["W19"] / communes_territorial_accessibility["W19"].max()
communes_territorial_accessibility["Sądy rejonowe (w ramach właściwości miejscowej)_percent of max"] = communes_territorial_accessibility["W20"] / communes_territorial_accessibility["W20"].max()
communes_territorial_accessibility.head(3)
tst = communes_territorial_accessibility.columns.values.tolist()
tst[-20:]
communes_territorial_accessibility_columns = communes_territorial_accessibility.columns.values.tolist()
for i in communes_territorial_accessibility_columns[-20:]:
plot = sns.jointplot(x=i, y='Dudas percentage of votes', data=communes_territorial_accessibility, kind='reg', marker="+", color="r")
plot.ax_joint.axhline(y=0.5)
plot.ax_joint.axvline(x=0.5)
plot.ax_marg_x.set_xlim(0, 1)
plot.ax_marg_y.set_ylim(0, 1)
#sns.jointplot(x=i, y='Trzaskowskis percentage of votes', data=communes_territorial_accessibility, kind='reg', marker="+", color="b")